import requests
import json
from lxml import etree

dict_data = []

def get_data(id):
    if id == 1:
        response = requests.get('https://www.gushici.net/index.html', )
    else:
        response = requests.get(f'https://www.gushici.net/index_{id}.html')
    html = etree.HTML(response.text.encode('ISO-8859-1').decode('utf-8'))
    return html

def handle_data(html):
    title = html.xpath('/html/body/div[2]/div[1]/div/div/p[1]/a/b/text()')
    dynasty = html.xpath('/html/body/div[2]/div[1]/div/div/p[2]/a[1]/text()')
    author = html.xpath('/html/body/div[2]/div[1]/div/div/p[2]/a[2]/text()')
    for i in range(0, len(title)):
        json_data = {
            'title': title[i],
            'dynasty': dynasty[i],
            'author': author[i],
        }
        content = html.xpath(f'/html/body/div[2]/div[1]/div[{i+1}]/div/div//a/text()')
        json_data['content'] = content
        dict_data.append(json_data)
    
        
    
    

def main():
    
    for i in range(1, 100):
        handle_data(get_data(i))

        print(f'第{i}页爬取完成')

    with open('data.json', 'w', encoding='utf-8') as f:
        json.dump(dict_data, f, ensure_ascii=False)
    

if __name__ == '__main__':
    main()